import os
from sklearn.utils import shuffle


def generata_train_val():
    data_path_root = '/home/xiaomin/wxm/Data/KaggleCCS'
    data_path_Otrain = os.path.join(data_path_root, 'train')

    txt_path_train = os.path.join(data_path_root, 'DataForCaffe', 'train.txt')
    txt_path_val = os.path.join(data_path_root, 'DataForCaffe', 'val.txt')
    txt_train = open(txt_path_train, 'w')
    txt_val = open(txt_path_val, 'w')
    num = 0
    lines = []
    for folder in os.listdir(data_path_Otrain):
        if folder.startswith('.'):
            continue
        cid = int(folder[-1]) - 1

        for image_file in os.listdir(os.path.join(data_path_Otrain, folder)):
            print image_file
            num += 1
            lines.append(folder + '/' + image_file + ' ' + str(cid))
    lines = shuffle(lines)

    for i in range(num):
        if i < 1200:
            txt_train.write(lines[i] + '\n')
        else:
            txt_val.write(lines[i] + '\n')

if __name__ == '__main__':
    generata_train_val()